Problem Set 5

Github link: https://github.com/Soul22238/STATS506/tree/main/ProblemSet4

Problem Set 5

Problem 1

a

setClass("waldCI", slots = c(level = "numeric",
                   lb = "numeric",
                   ub = "numeric",
                   mean = "numeric",
                   sterr = "numeric"))
## Constructor
waldCI <- function(level = 0.95,
                    mean = NULL,
                    sterr = NULL,
                    lb = NULL,
                    ub = NULL) {
   z <- qnorm(1 - (1 - level)/2)
   if (!is.null(mean) && !is.null(sterr)){
     lb <- mean - z * sterr
     ub <- mean + z * sterr
   } else if (!is.null(lb) && !is.null(ub)) {
    mean <- (lb + ub) / 2
    sterr <- (ub - lb) / (2 * z)
  } else {
    stop("You must provide either (mean, sterr) or (lb, ub).")
  }
   return(new("waldCI",level = level, mean = mean, sterr = sterr, lb = lb, ub = ub))
}

## Validator
setValidity("waldCI", function(object){
  if (!is.finite(object@level) || object@level >= 1 || object@level <= 0){
    return(paste("@confidence level =", object@level, "is not a valid confidence level (must be 0 < level < 1)."))
  }

  if (!is.finite(object@ub) || !is.finite(object@lb) || object@ub <= object@lb) {
    if (is.finite(object@ub) && is.finite(object@lb) && object@ub <= object@lb) {
      return(paste("@upper bound", object@ub, "must be strictly greater than @lower bound", object@lb))
    } 
  }
  
  if (is.finite(object@sterr) && object@sterr < 0){
    return(paste("@standard error", object@sterr, "must not be < 0."))
  }
  
  return (TRUE)
})
Class "waldCI" [in ".GlobalEnv"]

Slots:
                                              
Name:    level      lb      ub    mean   sterr
Class: numeric numeric numeric numeric numeric
## show method
##' @title Display a `waldCI` object
##' @param object A `waldCI` object
setMethod("show", "waldCI",
  function(object) {
    cat("confidence level: ", object@level)
    cat("\n")
    cat("mean: ", object@mean)
    cat("\n")
    cat("standard error: ", object@sterr)
    cat("\n")
    cat("upper bound: ", object@ub)
    cat("\n")
    cat("lower bound: ", object@lb)
    cat("\n")
    return(invisible(object))
  }
)
# accessors
setGeneric("lb",
           function(object) {
             standardGeneric("lb")
           })
[1] "lb"
setMethod("lb", "waldCI",
          function(object) {
            return(slot(object, "lb"))
          })
setGeneric("ub",
           function(object) {
             standardGeneric("ub")
           })
[1] "ub"
setMethod("ub", "waldCI",
          function(object) {
            return(slot(object, "ub"))
          })
setGeneric("mean",
           function(object) {
             standardGeneric("mean")
           })
Creating a new generic function for 'mean' in the global environment
[1] "mean"
setMethod("mean", "waldCI",
          function(object) {
            return(slot(object, "mean"))
          })
setGeneric("sterr",
           function(object) {
             standardGeneric("sterr")
           })
[1] "sterr"
setMethod("sterr", "waldCI",
          function(object) {
            return(slot(object, "sterr"))
          })
setGeneric("level",
           function(object) {
             standardGeneric("level")
           })
[1] "level"
setMethod("level", "waldCI",
          function(object) {
            return(slot(object, "level"))
          })
# Setters
setGeneric("lb<-",
           function(object, value) {
             standardGeneric("lb<-")
           })
[1] "lb<-"
setMethod("lb<-", "waldCI",
  function(object, value) {
    object@lb <- value
    validObject(object) 
    return(object)
  }
)

setGeneric("ub<-",
           function(object, value) {
             standardGeneric("ub<-")
           })
[1] "ub<-"
setMethod("ub<-", "waldCI",
  function(object, value) {
    object@ub <- value
    validObject(object) 
    return(object)
  }
)

setGeneric("mean<-",
           function(object, value) {
             standardGeneric("mean<-")
           })
[1] "mean<-"
setMethod("mean<-", "waldCI",
  function(object, value) {
    object@mean <- value
    validObject(object) 
    return(object)
  }
)

setGeneric("sterr<-",
           function(object, value) {
             standardGeneric("sterr<-")
           })
[1] "sterr<-"
setMethod("sterr<-", "waldCI",
  function(object, value) {
    object@sterr <- value
    validObject(object) 
    return(object)
  }
)

setGeneric("level<-",
           function(object, value) {
             standardGeneric("level<-")
           })
[1] "level<-"
setMethod("level<-", "waldCI",
  function(object, value) {
    object@level <- value
    validObject(object) 
    return(object)
  }
)

# Contains
setGeneric("contains",
           function(object, value) {
             standardGeneric("contains")
           })
[1] "contains"
setMethod("contains", c("waldCI", "numeric"),
  function(object, value) {
    value >= object@lb & value <= object@ub
  })

# Overlap
setGeneric("overlap",function(object1, object2) {
             standardGeneric("overlap")
           })
[1] "overlap"
setMethod("overlap", c("waldCI", "waldCI"),
  function(object1, object2) {
    !(object1@ub < object2@lb | object2@ub < object1@lb)
  })

# as.numeric

setMethod("as.numeric", "waldCI",
  function(x) {
  c(x@lb, x@ub)
})

# transformCI
setGeneric("transformCI",
           function(object, f) {
             standardGeneric("transformCI")
           })
[1] "transformCI"
setMethod("transformCI",
          signature(object = "waldCI", f = "function"),
          function(object, f) {
            # warn the user
            warning("Only monotonic functions are meaningful for transforming confidence intervals.")

            # apply f to bounds
            new_lb <- f(object@lb)
            new_ub <- f(object@ub)

            # if decreasing monotone, swap
            if (new_lb > new_ub) {
              tmp <- new_lb
              new_lb <- new_ub
              new_ub <- tmp
            }

            new_obj <- waldCI(level = object@level,
                              lb = new_lb,
                              ub = new_ub,
                              mean = NULL,
                              sterr = NULL)
            validObject(new_obj)
            return(new_obj)
          })

b

ci1 <- waldCI(level = 0.95, lb = 17.2, ub = 24.7)
ci2 <- waldCI(level = 0.99, mean = 13, sterr = 2.5)
ci3 <- waldCI(level = 0.75, lb = 27.43, ub = 39.22)
ci1
confidence level:  0.95
mean:  20.95
standard error:  1.9133
upper bound:  24.7
lower bound:  17.2
ci2
confidence level:  0.99
mean:  13
standard error:  2.5
upper bound:  19.43957
lower bound:  6.560427
ci3
confidence level:  0.75
mean:  33.325
standard error:  5.12453
upper bound:  39.22
lower bound:  27.43
as.numeric(ci1)
[1] 17.2 24.7
as.numeric(ci2)
[1]  6.560427 19.439573
as.numeric(ci3)
[1] 27.43 39.22
lb(ci2)
[1] 6.560427
ub(ci2)
[1] 19.43957
mean(ci1)
[1] 20.95
sterr(ci3)
[1] 5.12453
level(ci2)
[1] 0.99
lb(ci2) <- 10.5
mean(ci3) <- 34
level(ci3) <- .8
contains(ci1, 17)
[1] FALSE
contains(ci3, 44)
[1] FALSE
overlap(ci1, ci2)
[1] TRUE
eci1 <- transformCI(ci1, sqrt)
Warning in transformCI(ci1, sqrt): Only monotonic functions are meaningful for
transforming confidence intervals.
eci1
confidence level:  0.95
mean:  4.558599
standard error:  0.2098562
upper bound:  4.969909
lower bound:  4.147288
mean(transformCI(ci2, log))
Warning in transformCI(ci2, log): Only monotonic functions are meaningful for
transforming confidence intervals.
[1] 2.659343

c

# Expected not allowed
waldCI(level = 0.95, mean = 10, sterr = -1)
Error in validObject(.Object): invalid class "waldCI" object: @upper bound 8.04003601545995 must be strictly greater than @lower bound 11.9599639845401
waldCI(level = 0.95, lb = 5, ub = 4)
Error in validObject(.Object): invalid class "waldCI" object: @upper bound 4 must be strictly greater than @lower bound 5
waldCI(level = 0.95, lb = 5, ub = 5)
Error in validObject(.Object): invalid class "waldCI" object: @upper bound 5 must be strictly greater than @lower bound 5
waldCI(level = 1, mean = 10, sterr = 1)
Error in validObject(.Object): invalid class "waldCI" object: @confidence level = 1 is not a valid confidence level (must be 0 < level < 1).
ci_valid <- waldCI(lb = 10, ub = 20)
lb(ci_valid) <- 25
Error in validObject(object): invalid class "waldCI" object: @upper bound 20 must be strictly greater than @lower bound 25

Problem 3

a

There are one significant spike and about 4 minor spikes. I queried chatgpt for the grammar of plotly and revised them.

covid <- read.csv("us-states.txt")
library(dplyr)

Attaching package: 'dplyr'
The following object is masked _by_ '.GlobalEnv':

    contains
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(plotly)
Warning: package 'plotly' was built under R version 4.3.3
Loading required package: ggplot2
Warning: package 'ggplot2' was built under R version 4.3.3

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
national_cases <- covid %>%
  # Convert the date column to the Date format
  mutate(date = as.Date(date)) %>%
  # Group data by date
  group_by(date) %>%
  # Summarize to calculate the national daily average cases
  summarise(
    National_Cases_Avg = sum(cases_avg, na.rm = TRUE),
    .groups = 'drop'
  )

# 2. Create the Interactive Plot using plot_ly()
covid_spikes_plot_plotly <- national_cases %>%
  # Start the plot with plot_ly, defining x, y, type, mode, and line style
  plot_ly(x = ~date, y = ~National_Cases_Avg,
          type = 'scatter', mode = 'lines',
          line = list(color = 'darkred', width = 1)) %>%

  layout(
    title = list(text = "U.S. National COVID-19 Waves<br><sup>7-Day Average New Cases</sup>"),

    xaxis = list(
      title = "Date",
      dtick = "M6",          
      tickformat = "%Y %b"   # Display format: 2020 Jan
    ),

    yaxis = list(title = "Avg. New Cases (National)")
  )

# 3. Print the interactive chart
covid_spikes_plot_plotly

b

I calculate the total cumulative covid case rate per 100K population for every state and ranks them from highest to lowest to get the highest and lowest overall rates per population. The main differences are the Maine has a huge spike in 2022, whereas Rhode Island experience minor spikes at the same time.

state_ranking <- covid %>%

  mutate(date = as.Date(date)) %>%
  group_by(state) %>%
  summarise(Cumulative_Rate = sum(cases_avg_per_100k, na.rm = TRUE), .groups = 'drop') %>%
  # Arrange states in descending order of the cumulative rate
  arrange(desc(Cumulative_Rate))

# Get the names of the highest and lowest ranked states
highest_state_name <- head(state_ranking$state, 1)
lowest_state_name <- tail(state_ranking$state, 1)

# Filter the main data to include only the two states for comparison
comparison_data <- covid %>%
  filter(state %in% c(highest_state_name, lowest_state_name)) %>%
  mutate(date = as.Date(date))

trajectory_plot_plotly <- comparison_data %>%
  # Start the plot with plot_ly
  plot_ly(x = ~date, 
          y = ~cases_avg_per_100k, 
          color = ~state,  
          type = 'scatter', 
          mode = 'lines') %>%
  
  # Set the chart layout (titles and labels)
  layout(
    title = "COVID-19 Trajectory",
    xaxis = list(title = "Date"),
    yaxis = list(title = "7-Day Avg. Cases per 100k")
  )

trajectory_plot_plotly

c

I referenced the solution of Problem Set 5 and chatgpt for plotly grammar. We can see from this messy plot that the first spike is approximately from 2020-03-21 to 2020-05-20.

library(lubridate)

Attaching package: 'lubridate'
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
covid20 <- covid %>%
  mutate(Date = as.Date(date)) %>%
  filter(year(Date) == 2020) %>%
  select(state, date, cases_avg_per_100k)

trajectory_plot_no_legend <- covid20 %>%
  plot_ly(x = ~date, 
          y = ~cases_avg_per_100k, 
          split = ~state,         
          type = 'scatter', 
          mode = 'lines',
          showlegend = FALSE) %>% 
  
  layout(
    yaxis = list(title = "COVID cases per 100k")
  )

trajectory_plot_no_legend

I restrict the data to have cases_avg_per_100k to be larger than 15 and zoom into March, April, and May. We can know from the plot that the top 5 states are New York, New Jersey, Louisiana, Gaum, and Connecticut.

covid20_mar_may <- covid20 %>%
  mutate(Date = as.Date(date)) %>%
  filter(cases_avg_per_100k > 15) %>%
  filter(month(Date) %in% c(3, 4, 5)) 

trajectory_plot_plotly <- covid20_mar_may %>%
  plot_ly(x = ~Date, 
          y = ~cases_avg_per_100k, 
          split = ~state,        
          type = 'scatter', 
          mode = 'lines',
          showlegend = TRUE) %>% 
  
  layout(
    title = "COVID-19 Cases per 100k (March - May)",
    yaxis = list(title = "COVID cases per 100k"),
    xaxis = list(title = "Date",
                 type = "date")
  )

trajectory_plot_plotly